library(tidyverse)
library(ggplot2)
library(ggthemes)
library(extrafont) # loadfonts(device = "win")
library(plotly)
library(DT)
survey <- read_csv("survey_SCF.txt", col_names = TRUE)
byyear <- survey %>%
select(YEAR, INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, EDN_INST, VEH_INST, TPAY, INCOME) %>%
group_by(YEAR) %>%
summarise_all(sum)
avgbyyear <- survey %>%
select(YEAR, INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, EDN_INST, VEH_INST, TPAY, INCOME) %>%
group_by(YEAR) %>%
summarise_all(mean)
chart1 <- byyear %>%
ggplot +
aes(factor(YEAR), EDN_INST/1000000, group = 1)+
geom_line() +
geom_point(shape=23, fill="steelblue4", size=4) +
scale_y_continuous(labels = scales::comma_format()) +
theme_minimal() +
theme(text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
labs(title = "Total Student Debt over the Years",
subtitle = "This simple connected line chart is effective in helping draw attention to the dramatic increase
of student loan debt over the survey years.",
x = "Years", y = "Education Loans ($ in the Millions)")
chart1
chart2 <- avgbyyear %>%
gather(loan_type, loan_amount, c(NH_MORT, VEH_INST, EDN_INST, CCBAL)) %>%
ggplot +
aes(fill = loan_type, x = factor(YEAR), y = loan_amount/1000, label = loan_type) +
geom_bar(position="stack", stat="identity") +
theme_minimal() +
theme(legend.position="bottom",
text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
labs(title ="Amount of Debt for the Average of Person",
subtitle = "A stacked bar chart is helpful to see the ratio of different debt for an average person and how it changes
over the years. ",
x = "Years", y = "Total Debt ($ in the thousands)") +
scale_fill_economist(name = "Loan Types", labels = c("Credit Cards",
"Education", "Mortgage",
"Vehicle")) +
scale_y_continuous(labels = scales::comma_format())
chart2
survey_2016 <- filter(survey, YEAR == 2016)
chart3 <- survey_2016 %>%
filter(AGECL == 1 & HHSEX == 2 & FAMSTRUCT == 2 & RACE == 5) %>%
ggplot+
aes(x = factor(EDCL), y = INCOME, fill=factor(EDCL)) +
geom_boxplot() +
scale_fill_economist() +
geom_jitter(color="slategray", size=0.4, alpha=0.9) +
scale_x_discrete(labels=c("HS/GED", "Some College", "College")) +
theme_minimal() +
theme(legend.position="none",
text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
labs(title ="What if I didn't go to college?",
subtitle = "I am interested in looking at the income and education of people similar to me in this chart. Therefore, I
filtered for my demographics and then feed it into a boxplot. Boxplots with jitter points are good in this
situation because it helps to see the full distribution.",
caption = "Year: 2016",
y = "Annual Income ($)", x = NULL) +
scale_y_continuous(labels = scales::comma_format())
chart3
chart4 <- survey_2016 %>%
filter(FAMSTRUCT == 1,4,5) %>%
ggplot+
aes(x = factor(KIDS), y = EDN_INST/1000, color = factor(EDCL)) +
geom_jitter() +
labs(title ="Family and Student Loans in 2016",
subtitle = "Using a scatter plot is dynamic visually in this situation because it highlights the density of the X variable.
Having the third dimension as color helps further separate the loan amount per Education level
for the reader.",
x = "Amount of Kids",
y = "Student Loan Debt ($ in the thousands)") +
theme_minimal() +
theme(legend.position="bottom",
text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
scale_color_economist(name = "Education Level", labels = c("NO HS/GED",
"HS/GED",
"Some College", "College")) +
scale_y_continuous(labels = scales::comma_format())
chart4
avgedu_survey <- survey_2016 %>%
select(INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL,
EDN_INST, VEH_INST, TPAY, INCOME, NWCAT, EDCL) %>%
group_by(NWCAT,EDCL)%>%
summarise_all(mean)
chart5 <- avgedu_survey %>%
ggplot() +
aes(fill=factor(EDCL), y=EDN_INST, x= factor(NWCAT)) +
geom_bar(position="dodge", stat="identity") +
theme_minimal() +
theme(legend.position="bottom",
text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
labs(title = "Changes in Average Student Loans by Education in 2016",
subtitle = "Group bar charts are helpful here in that they help to easily put the households and Education Levels into data
bins. We see that the lower class bears the majority of the burden when it comes to student loan debt. ",
x = "Household Net Worth Percentile", y = "Student Loans ($)") +
scale_fill_economist(name = "Education Level",
labels = c("NO HS/GED","HS/GED","Some College", "College")) +
scale_x_discrete(labels=c("0-24.9", "25-49.9", "50-74.9", "75-89.9", "90-100")) +
scale_y_continuous(labels = scales::comma_format())
chart5
avgage_survey <- survey_2016 %>%
select(INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL,
EDN_INST, VEH_INST, TPAY, INCOME, NWCAT, RACE) %>%
group_by(NWCAT,RACE)%>%
summarise_all(mean)
chart6 <- survey_2016 %>%
ggplot() +
aes(y=AGE, x= EDN_INST, size = INCCAT, color = factor(EDCL)) +
geom_point(alpha = .1) +
scale_size(range = c(1, 10), name="Income Percentile",
labels=c("0-20", "20-39.9", "40-59.9", "60-79.9", "80-89.9", "90-100")) +
scale_x_continuous(labels = scales::comma_format()) +
scale_color_economist(name="Education Level",
labels=c("NO HS/GED", "HS/GED", "Some College", "College"))+
theme_minimal() +
theme(text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray")) +
labs(title = "Age, Wealth, and Education in 2016",
subtitle = "A 4 dimensional bubble chart might be overwhelming initially to the readers, but it contains a wealth information.
It shows concentration of loans for 3 different categorical variables. I tried to lessen the initial density of the
materials by choosing a very minimal theme and playing around with the transparency
",
x = "Education Loans ($)", y = "Age")
chart6
broke_survey <- survey %>%
filter(BNKRUPLAST5 == 1) %>%
select(YEAR, EDN_INST, INCOME) %>%
group_by(YEAR)%>%
summarise_all(mean)
chart7 <- broke_survey %>%
ggplot() +
aes(x = YEAR) +
geom_line( aes(y=EDN_INST, color = "Education Loans")) +
geom_line( aes(y=INCOME/10, color = "Income")) +
scale_y_continuous(
name = "Student Loans ($)",
sec.axis = sec_axis(~.*10, name="Income ($)", labels = scales::comma_format()),
labels = scales::comma_format()) +
scale_colour_economist(name = NULL) +
theme_minimal() +
theme(text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray"),
legend.position = "bottom",
axis.text.y = element_text(size=8, margin=margin(0,0,0,10)),
axis.text.y.right = element_text(size=8, margin=margin(0,20,0,0))) +
labs(title = "Factors of Bankruptcy",
subtitle = "To examine student loans as a factor of bankruptcy, I harnessed a dual Y-axis to show how the two variables
interact with one another. However, having two different Y-axis values can be confusing for readers
who are not graphically inclined. This type of graph is best for a more knowledgeable audience.", x = "Year")
chart7
bankrupt_food <- survey %>%
gather(food_type, food_amount, c( FOODAWAY, FOODHOME, FOODDELV))
bankrupt_food$BNKRUPLAST5 <- as.factor(bankrupt_food$BNKRUPLAST5)
levels(bankrupt_food$BNKRUPLAST5) <- c("Declared Bankruptcy*", "Has Not Declared Bankruptcy*")
levels(bankrupt_food$BNKRUPLAST5)
## [1] "Declared Bankruptcy*" "Has Not Declared Bankruptcy*"
chart8 <- bankrupt_food %>%
ggplot() +
aes(x=food_type, y=food_amount, color=as.factor(BNKRUPLAST5)) +
geom_point(size=3) +
facet_wrap(~BNKRUPLAST5) +
scale_color_economist()+
theme_minimal()+
theme(legend.position="none",
text = element_text(size = 11, family = "Garamond", color = "grey20"),
plot.title = element_text(size = 16, face = "bold", color="slategray"))+
labs(title = "Food and Bankruptcy",
subtitle = "This facet wrap chart shows the differences in spending on food between does who has and has not declared
bankruptcy in the past 5 years. Seeing the two next to each other help visually when we are trying to
compare two types.",
x = "Food Type", y = "Food Amount ($)",
caption = "*In the past 5 years") +
scale_x_discrete(labels=c("Away from Home", "Delivery", "At Home")) +
scale_y_continuous(labels = scales::comma_format())
chart8
The “Family and Student Loans” and the “Average Amount of Debt” charts are good for interactivty because they both have densed and stacked information. In the case of the scatter plot, there’s a lot of points on top of one another. With interactivity, the reader can look at each point individually. As for the stacked chart, finding the amount can be hard with varying intercepts for each category of data. Having interactivity makes it easier as you can simply hover now. However, my labels and other changes in my ggplot2 coded did not work when translating over to Plotly. Plotly is a nice beginner tool however and is widely used by the public so it’s worth considering for visualization projects.
ggplotly(chart4)
ggplotly(chart2)
I decided to make an interactive Data Table that focuses on demographic variables by year so that readers can filter for themselves easily. Data Tables can be overwhelming at times and that’s why I chose a simple format style and only five variables of interest. This is for easy navigation.
datatable <- survey %>%
select(YEAR, AGE, EDUC, INCOME, DEBT) %>%
rename(EDUCATION = EDUC) %>%
group_by(YEAR, AGE, EDUCATION) %>%
summarise_all(sum)
datatable$EDUCATION <- as.factor(datatable$EDUCATION)
levels(datatable$EDUCATION) <- c("LESS THAN 1ST GRADE",
"1ST, 2ND, 3RD, OR 4TH GRADE",
"5TH OR 6TH GRADE", "7TH OR 8TH GRADE",
"9TH GRADE","10TH GRADE","11TH GRADE",
"12TH GRADE, NO DIPLOMA",
"HIGH SCHOOL GRADUATE - HIGH SCHOOL DIPLOMA OR EQUIVALENT",
"SOME COLLEGE BUT NO DEGREE", "ASSOCIATE DEGREE IN COLLEGE - OCCUPATION/VOCATION PROGRAM",
"ASSOCIATE DEGREE IN COLLEGE - ACADEMIC PROGRAM",
"BACHELOR'S DEGREE (FOR EXAMPLE: BA, AB, BS)",
"MASTER'S DEGREE", "DOCTORATE OR PROFESSIONAL SCHOOL DEGREE")
levels(datatable$EDUCATION)
## [1] "LESS THAN 1ST GRADE"
## [2] "1ST, 2ND, 3RD, OR 4TH GRADE"
## [3] "5TH OR 6TH GRADE"
## [4] "7TH OR 8TH GRADE"
## [5] "9TH GRADE"
## [6] "10TH GRADE"
## [7] "11TH GRADE"
## [8] "12TH GRADE, NO DIPLOMA"
## [9] "HIGH SCHOOL GRADUATE - HIGH SCHOOL DIPLOMA OR EQUIVALENT"
## [10] "SOME COLLEGE BUT NO DEGREE"
## [11] "ASSOCIATE DEGREE IN COLLEGE - OCCUPATION/VOCATION PROGRAM"
## [12] "ASSOCIATE DEGREE IN COLLEGE - ACADEMIC PROGRAM"
## [13] "BACHELOR'S DEGREE (FOR EXAMPLE: BA, AB, BS)"
## [14] "MASTER'S DEGREE"
## [15] "DOCTORATE OR PROFESSIONAL SCHOOL DEGREE"
datatable %>%
datatable(
rownames = FALSE,
colnames = colnames(datatable),
filter = list(position = "top"),
options = list(language = list(sSearch = "Filter:")))%>%
formatStyle('YEAR', color = 'white',
backgroundColor = 'darkblue', fontWeight = 'bold')